Passengers Data

Load data

dat <- import(here("data", "dat.csv")) %>% 
   filter(Category=="Passenger") %>% 
   clean_names() %>% 
   mutate_all(na_if,"") %>% 
   drop_na(survived, gender, class, age)

Clean data

dat$gender <- as.factor(dat$gender)
dat$marital_status <- as.factor(dat$marital_status)
dat$category <- as.factor(dat$category)
dat$class <- as.factor(dat$class)
dat$survived <- as.factor(dat$survived)
dat$embarked <- as.factor(dat$embarked)
dat$disembarked  <- as.factor(dat$disembarked)

# # PEER Review MV: You could consider using dplyr to recode these new variables and then also creating factors that make more sense for some of the variables that have multiple variables. For example, numbering factors based on frequency for marital status. This probably isn't very helpful but a little cleaner than code above.
# dat <- dat %>%
#    mutate(gender = as.factor(gender),
#           marital_status = fct_infreq(marital_status),
#           category = as.factor(category),
#           class = as.factor(class),
#           survived = as.factor(survived),
#           embarked = as.factor(embarked),
#           disembarked = as.factor(disembarked))

dat <- dat %>% 
 mutate(nationality2 = case_when(nationality == "English" ~ "English",
   nationality == "Irish" ~ "Irish",
   nationality == "American" ~ "American",
   nationality == "Swedish" ~ "Swedish",
   nationality == "Finnish" ~ "Finnish",
   nationality == "Scottish" ~ "Scottish",
   nationality == "French" ~ "French",
   nationality == "Italian" ~ "Italian",
   nationality == "Canadian" ~ "Canadian",
   nationality == "Bulgarian" ~ "Bulgarian",
   nationality == "Croatian" ~ "Croatian",
   nationality == "Belgian" ~ "Belgian",
   nationality == "Norwegian" ~ "Norwegian",
   nationality == "Channel Islander" ~ "Channel Islander",
   nationality == "Welsh" ~ "Welsh",
   nationality == "Swiss" ~ "Swiss",
   nationality == "German" ~ "German",
   nationality == "Danish" ~ "Danish",
   nationality == "Spanish" ~ "Spanish",
   nationality == "Australian" ~ "Australian",
   nationality == "Polish" ~ "Polish",
   nationality == "South African" ~ "South African",
   nationality == "Bosnian" ~ "Bosnian",
   nationality == "Hong Kongese" ~ "Hong Kongese",
   nationality == "Dutch" ~ "Dutch",
   nationality == "Lithuanian" ~ "Lithuanian",
   nationality == "Greek" ~ "Greek",
   nationality == "Portuguese" ~ "Portuguese",
   nationality == "Uruguayan" ~ "Uruguayan",
   nationality == "Chinese" ~ "Chinese",
   nationality == "Slovenian" ~ "Slovenian",
   nationality == "Cape Verdean" ~ "Cape Verdean",
   nationality == "Egyptian" ~ "Egyptian",
   nationality == "Japanese" ~ "Japanese",
   nationality == "Hungarian" ~ "Hungarian",
   nationality == "Bosnian" ~ "Bosnian",
   nationality == "Hong Kongese" ~ "Hong Kongese",
   nationality == "Latvian" ~ "Latvian",
   nationality == "Austrian" ~ "Austrian",
   nationality == "Greek" ~ "Greek",
   nationality == "Mexican" ~ "Mexican",
   nationality == "Sweden" ~ "Swedish",
   nationality == "Turkish" ~ "Turkish",
   nationality == "Slovenian" ~ "Slovenian",
   nationality == "Guyanese" ~ "Guyanese",
   nationality == "Haitian" ~ "Haitian",
   nationality == "Syrian,Lebanese" ~ "Syrian/Lebanese",
   nationality == "Unknown" ~ "Unknown",
   TRUE ~ "Other - Multiple", ))

## PEER Review MV: For Nationality, Consider creating a more collapsed factor variable that only has those nationalities with 10 or more individuals and then an other category. Then you could potentially create a cleaner bar graph. Added some code below
# dat <- dat %>%
#    mutate(nationality_cat = fct_lump_min(nationality,10),
#           nationality_cat = fct_infreq(nationality_cat))

dat <- dat %>% 
   mutate(nationality2 = ifelse(nationality2 == "Unknown", NA, nationality2))

Descriptives

# Breakdown of passengers by class
dat %>% 
   group_by(class) %>% 
   summarize(count = n()) %>% 
   mutate(percent = (count/sum(count))*100) %>% 
   adorn_totals() %>%
   kable(caption = "Breakdown of Passengers by Class",
       col.names = c("Class", "Count", "Percent"),
       digits = 2,
       booktabs = TRUE) %>%
 kable_classic(full_width = F, html_font = "Cambria")
Breakdown of Passengers by Class
Class Count Percent
1st Class 324 24.64
2nd Class 284 21.60
3rd Class 707 53.76
Total 1315 100.00
# Breakdown of passengers by class and gender
dat %>% 
   group_by(class, gender) %>% 
   summarize(count = n()) %>% 
   mutate(percent = (count/sum(count))*100) %>%
   kable(caption = "Breakdown of Passengers by Class and Gender",
       col.names = c("Class", "Gender", "Count", "Percent"),
       digits = 2,
       booktabs = TRUE) %>%
   kable_styling()
Breakdown of Passengers by Class and Gender
Class Gender Count Percent
1st Class Female 144 44.44
1st Class Male 180 55.56
2nd Class Female 106 37.32
2nd Class Male 178 62.68
3rd Class Female 216 30.55
3rd Class Male 491 69.45
# Breakdown of passenger nationalities
dat %>% 
   filter(!is.na(nationality2)) %>% 
   group_by(nationality2) %>% 
   summarize(count = n()) %>% 
   mutate(percent = (count/sum(count))*100) %>% 
   arrange(desc(percent)) %>%
   kable(caption = "Breakdown of Passenger Nationalities",
       col.names = c("Nationality", "Count", "Percent"),
       digits = 2,
       booktabs = TRUE) %>%
   kable_styling()
Breakdown of Passenger Nationalities
Nationality Count Percent
English 295 22.43
American 242 18.40
Irish 122 9.28
Other - Multiple 108 8.21
Swedish 100 7.60
Syrian/Lebanese 85 6.46
Finnish 58 4.41
Canadian 37 2.81
Bulgarian 31 2.36
Croatian 28 2.13
French 26 1.98
Norwegian 26 1.98
Belgian 25 1.90
Scottish 17 1.29
Channel Islander 15 1.14
Swiss 13 0.99
Danish 10 0.76
Italian 9 0.68
German 8 0.61
Spanish 8 0.61
Welsh 8 0.61
Polish 6 0.46
Bosnian 4 0.30
Hong Kongese 4 0.30
South African 4 0.30
Greek 3 0.23
Lithuanian 3 0.23
Uruguayan 3 0.23
Australian 2 0.15
Chinese 2 0.15
Portuguese 2 0.15
Slovenian 2 0.15
Austrian 1 0.08
Dutch 1 0.08
Egyptian 1 0.08
Haitian 1 0.08
Hungarian 1 0.08
Japanese 1 0.08
Latvian 1 0.08
Mexican 1 0.08
Turkish 1 0.08
# Breakdown of passenger nationalities by class
# # PEER Review MV: - Consider visualizing some of your data that is currently in tables into bar graphs. This one doesn't have all the info on the table, but easier to see which passengers were most represented
# dat %>%
#    filter(!is.na(nationality2)) %>% 
#    group_by(nationality_cat) %>% 
#    summarize(count = n()) %>% 
#    mutate(percent = (count/sum(count))*100) %>% 
#    arrange((percent)) %>%
#    ggplot(aes(y = nationality_cat)) + 
#    geom_col(aes(x = percent), fill = "dark red") + 
#    geom_text(aes(x = percent, label=round(percent,2)), hjust = -.2, size = 3) +
#    theme_minimal() + 
#    labs(x = "Percentage of all passenges", y = "Nationality")

# Breakdown of passenger nationalities by class (all)
dat %>% 
   filter(!is.na(nationality2)) %>% 
   group_by(class, nationality2) %>% 
   summarize(count = n()) %>% 
   mutate(percent = (count/sum(count))*100) %>% 
   arrange(class, desc(percent)) %>%
   kable(caption = "Breakdown of Passenger Nationalities by Class (All)",
       col.names = c("Class", "Nationality", "Count", "Percent"),
       digits = 2,
       booktabs = TRUE) %>%
   kable_styling()
Breakdown of Passenger Nationalities by Class (All)
Class Nationality Count Percent
1st Class American 195 60.19
1st Class English 38 11.73
1st Class Canadian 27 8.33
1st Class Other - Multiple 14 4.32
1st Class French 10 3.09
1st Class Swiss 6 1.85
1st Class German 5 1.54
1st Class Irish 5 1.54
1st Class Spanish 4 1.23
1st Class Swedish 4 1.23
1st Class Scottish 3 0.93
1st Class Uruguayan 3 0.93
1st Class Belgian 2 0.62
1st Class Italian 2 0.62
1st Class Channel Islander 1 0.31
1st Class Dutch 1 0.31
1st Class Egyptian 1 0.31
1st Class Mexican 1 0.31
1st Class Norwegian 1 0.31
1st Class Polish 1 0.31
2nd Class English 145 51.06
2nd Class Other - Multiple 25 8.80
2nd Class American 24 8.45
2nd Class Channel Islander 12 4.23
2nd Class Irish 12 4.23
2nd Class French 11 3.87
2nd Class Scottish 8 2.82
2nd Class Finnish 6 2.11
2nd Class Swedish 6 2.11
2nd Class Canadian 5 1.76
2nd Class South African 4 1.41
2nd Class Spanish 4 1.41
2nd Class Danish 3 1.06
2nd Class Italian 3 1.06
2nd Class Lithuanian 2 0.70
2nd Class Swiss 2 0.70
2nd Class Syrian/Lebanese 2 0.70
2nd Class Welsh 2 0.70
2nd Class Australian 1 0.35
2nd Class Belgian 1 0.35
2nd Class German 1 0.35
2nd Class Haitian 1 0.35
2nd Class Hungarian 1 0.35
2nd Class Japanese 1 0.35
2nd Class Norwegian 1 0.35
2nd Class Portuguese 1 0.35
3rd Class English 112 15.84
3rd Class Irish 105 14.85
3rd Class Swedish 90 12.73
3rd Class Syrian/Lebanese 83 11.74
3rd Class Other - Multiple 69 9.76
3rd Class Finnish 52 7.36
3rd Class Bulgarian 31 4.38
3rd Class Croatian 28 3.96
3rd Class Norwegian 24 3.39
3rd Class American 23 3.25
3rd Class Belgian 22 3.11
3rd Class Danish 7 0.99
3rd Class Scottish 6 0.85
3rd Class Welsh 6 0.85
3rd Class Canadian 5 0.71
3rd Class French 5 0.71
3rd Class Polish 5 0.71
3rd Class Swiss 5 0.71
3rd Class Bosnian 4 0.57
3rd Class Hong Kongese 4 0.57
3rd Class Italian 4 0.57
3rd Class Greek 3 0.42
3rd Class Channel Islander 2 0.28
3rd Class Chinese 2 0.28
3rd Class German 2 0.28
3rd Class Slovenian 2 0.28
3rd Class Australian 1 0.14
3rd Class Austrian 1 0.14
3rd Class Latvian 1 0.14
3rd Class Lithuanian 1 0.14
3rd Class Portuguese 1 0.14
3rd Class Turkish 1 0.14
# Trying pivot wider
dat_class_tidy <- dat %>% 
   filter(category == "Passenger") %>% 
   filter(!is.na(nationality2)) %>% 
   group_by(class, nationality2) %>% 
   summarize(count = n()) %>% 
   mutate(percent = (count/sum(count))*100) %>% 
   arrange(class, desc(percent)) %>%
 pivot_wider(
  names_from = "class",
  values_from = "class"
 ) %>%
 kable(caption = "Breakdown of Passenger Nationalities by Class (Wider)",
       col.names = c("Nationality", "Count", "Percent", "1st Class", "2nd Class", "3rd Class"),
       digits = 2,
       booktabs = TRUE) %>%
 kable_styling

dat_class_tidy
Breakdown of Passenger Nationalities by Class (Wider)
Nationality Count Percent 1st Class 2nd Class 3rd Class
American 195 60.19 1st Class NA NA
English 38 11.73 1st Class NA NA
Canadian 27 8.33 1st Class NA NA
Other - Multiple 14 4.32 1st Class NA NA
French 10 3.09 1st Class NA NA
Swiss 6 1.85 1st Class NA NA
German 5 1.54 1st Class NA NA
Irish 5 1.54 1st Class NA NA
Spanish 4 1.23 1st Class NA NA
Swedish 4 1.23 1st Class NA NA
Scottish 3 0.93 1st Class NA NA
Uruguayan 3 0.93 1st Class NA NA
Belgian 2 0.62 1st Class NA NA
Italian 2 0.62 1st Class NA NA
Channel Islander 1 0.31 1st Class NA NA
Dutch 1 0.31 1st Class NA NA
Egyptian 1 0.31 1st Class NA NA
Mexican 1 0.31 1st Class NA NA
Norwegian 1 0.31 1st Class NA NA
Polish 1 0.31 1st Class NA NA
English 145 51.06 NA 2nd Class NA
Other - Multiple 25 8.80 NA 2nd Class NA
American 24 8.45 NA 2nd Class NA
Channel Islander 12 4.23 NA 2nd Class NA
Irish 12 4.23 NA 2nd Class NA
French 11 3.87 NA 2nd Class NA
Scottish 8 2.82 NA 2nd Class NA
Finnish 6 2.11 NA 2nd Class NA
Swedish 6 2.11 NA 2nd Class NA
Canadian 5 1.76 NA 2nd Class NA
South African 4 1.41 NA 2nd Class NA
Spanish 4 1.41 NA 2nd Class NA
Danish 3 1.06 NA 2nd Class NA
Italian 3 1.06 NA 2nd Class NA
Lithuanian 2 0.70 NA 2nd Class NA
Swiss 2 0.70 NA 2nd Class NA
Syrian/Lebanese 2 0.70 NA 2nd Class NA
Welsh 2 0.70 NA 2nd Class NA
Australian 1 0.35 NA 2nd Class NA
Belgian 1 0.35 NA 2nd Class NA
German 1 0.35 NA 2nd Class NA
Haitian 1 0.35 NA 2nd Class NA
Hungarian 1 0.35 NA 2nd Class NA
Japanese 1 0.35 NA 2nd Class NA
Norwegian 1 0.35 NA 2nd Class NA
Portuguese 1 0.35 NA 2nd Class NA
English 112 15.84 NA NA 3rd Class
Irish 105 14.85 NA NA 3rd Class
Swedish 90 12.73 NA NA 3rd Class
Syrian/Lebanese 83 11.74 NA NA 3rd Class
Other - Multiple 69 9.76 NA NA 3rd Class
Finnish 52 7.36 NA NA 3rd Class
Bulgarian 31 4.38 NA NA 3rd Class
Croatian 28 3.96 NA NA 3rd Class
Norwegian 24 3.39 NA NA 3rd Class
American 23 3.25 NA NA 3rd Class
Belgian 22 3.11 NA NA 3rd Class
Danish 7 0.99 NA NA 3rd Class
Scottish 6 0.85 NA NA 3rd Class
Welsh 6 0.85 NA NA 3rd Class
Canadian 5 0.71 NA NA 3rd Class
French 5 0.71 NA NA 3rd Class
Polish 5 0.71 NA NA 3rd Class
Swiss 5 0.71 NA NA 3rd Class
Bosnian 4 0.57 NA NA 3rd Class
Hong Kongese 4 0.57 NA NA 3rd Class
Italian 4 0.57 NA NA 3rd Class
Greek 3 0.42 NA NA 3rd Class
Channel Islander 2 0.28 NA NA 3rd Class
Chinese 2 0.28 NA NA 3rd Class
German 2 0.28 NA NA 3rd Class
Slovenian 2 0.28 NA NA 3rd Class
Australian 1 0.14 NA NA 3rd Class
Austrian 1 0.14 NA NA 3rd Class
Latvian 1 0.14 NA NA 3rd Class
Lithuanian 1 0.14 NA NA 3rd Class
Portuguese 1 0.14 NA NA 3rd Class
Turkish 1 0.14 NA NA 3rd Class
# PEER Review MV: Here, I think another good opportunity to visualize tables that intersect Nationality and class. I think you are able to more easily see variation in nationality by class. Interesting how Americans were concentrated in first class, and third class varied much more.
# dat %>% 
#    filter(category == "Passenger") %>% 
#    filter(!is.na(nationality2)) %>% 
#    group_by(class, nationality_cat) %>% 
#    summarize(count = n()) %>% 
#    mutate(percent = (count/sum(count))*100) %>% 
#    arrange(class, desc(percent)) %>%
#    ggplot(aes(y = nationality_cat)) + 
#    geom_col(aes(x = percent), fill = "dark red") + 
#    facet_wrap(~fct_infreq(class)) +
#    geom_text(aes(x = percent, label=round(percent,2)), hjust = -.1, size = 3) +
#    theme_minimal() +
#    labs(x = "Percent of passengers by class", y = "Nationality")



# Average age by class
dat %>% 
   group_by(class) %>% 
   summarize(avg_age = mean(age), std_age = sd(age), min_age = min(age), 
             max_age = max(age)) %>%
   kable(caption = "Average Age by Class",
       col.names = c("Class", "Average Age", "SD Age", "Minimum Age", "Maximum Age"),
       digits = 2,
       booktabs = TRUE) %>%
   kable_styling()
Average Age by Class
Class Average Age SD Age Minimum Age Maximum Age
1st Class 39.14 13.55 0 71
2nd Class 30.01 13.90 0 71
3rd Class 25.12 11.71 0 74

Examining Survival

Survival rates

# Survival rate by class
dat %>% 
   group_by(class, survived) %>% 
   summarize(count = n()) %>% 
   mutate(percent = (count/sum(count))*100) %>% 
   arrange(class, survived) %>%
   kable(caption = "Survival Rate by Class",
       col.names = c("Class", "Survived", "Count", "Percent"),
       digits = 2,
       booktabs = TRUE) %>%
   kable_styling()
Survival Rate by Class
Class Survived Count Percent
1st Class Lost 123 37.96
1st Class Saved 201 62.04
2nd Class Lost 166 58.45
2nd Class Saved 118 41.55
3rd Class Lost 526 74.40
3rd Class Saved 181 25.60
# Survival rate by gender
dat %>% 
   group_by(gender, survived) %>% 
   summarize(count = n()) %>% 
   mutate(percent = (count/sum(count))*100) %>% 
   arrange(gender, survived) %>%
   kable(caption = "Survival Rate by Gender",
       col.names = c("Gender", "Survived", "Count", "Percent"),
       digits = 2,
       booktabs = TRUE) %>%
   kable_styling()
Survival Rate by Gender
Gender Survived Count Percent
Female Lost 127 27.25
Female Saved 339 72.75
Male Lost 688 81.04
Male Saved 161 18.96
# Survival rate by class and gender
dat %>% 
   group_by(class, gender, survived) %>% 
   summarize(count = n()) %>% 
   mutate(percent = (count/sum(count))*100) %>% 
   arrange(class, gender) %>%
   kable(caption = "Survival Rate by Class and Gender",
       col.names = c("Class", "Gender", "Survived", "Count", "Percent"),
       digits = 2,
       booktabs = TRUE) %>%
   kable_styling()
Survival Rate by Class and Gender
Class Gender Survived Count Percent
1st Class Female Lost 5 3.47
1st Class Female Saved 139 96.53
1st Class Male Lost 118 65.56
1st Class Male Saved 62 34.44
2nd Class Female Lost 12 11.32
2nd Class Female Saved 94 88.68
2nd Class Male Lost 154 86.52
2nd Class Male Saved 24 13.48
3rd Class Female Lost 110 50.93
3rd Class Female Saved 106 49.07
3rd Class Male Lost 416 84.73
3rd Class Male Saved 75 15.27

Density ridges

## PEER Review MV: Cool graphs! 

surv_classhist <- dat %>% 
   ggplot(aes(age, class)) +
   geom_density_ridges(aes(fill = factor(survived))) +
   labs(title = "Age Distribution of Survival Status By Class", 
   x = "Age Distribution", y = "Passenger Class") +
   theme_minimal() +
   theme(plot.title = element_text(hjust = 0.5))

surv_classhist + scale_fill_manual(name = "Survival", values = c("blue","grey"))

surv_agehist <- dat %>% 
   ggplot(aes(age, gender)) +
   geom_density_ridges(aes(fill = factor(survived))) +
   labs(title = "Age Distribution of Survival Status By Gender", 
   x = "Age Distribution", y = "Passenger Gender") +
   theme_minimal() +
   theme(plot.title = element_text(hjust = 0.5))

surv_agehist + scale_fill_manual(name = "Survival", values = c("blue","grey"))

surv_ageclass_hist <- dat %>% 
   ggplot(aes(age, gender)) +
   facet_wrap(~class, nrow=3) +
   geom_density_ridges(aes(fill = factor(survived))) +
   labs(title = "Age Distribution of Survival Status By Class and Gender", 
   x = "Age Distribution", y = "Passenger Gender") +
   theme_minimal() +
   theme(plot.title = element_text(hjust = 0.5))

surv_ageclass_hist + scale_fill_manual(name = "Survival", values = c("blue","grey"))

Predicting survival

modeldat <- dat %>% 
   select(survived, gender, class, age)

ctree <- ctree(survived ~ gender + class + age, data=modeldat)
plot(ctree)

# PEER Review MV: I am not at all familiar with the ctree command or what it is doing but it appears to be predicting survival rates for each combination of categories as you go down the flowcharts. However, I'm not sure why class appears twice at different levels. If I follow the left most line, it makes sense that female and first class would have highest probability of save rate. But not sure why class appears twice along the same hierarchy. There must be some reasoning behind this but it will be important to clearly explain this in the final write up.

Tickets Prices

Load data

fares <- import(here("data", "avgfare.csv")) %>% 
   clean_names()

fares$accommodation <- as.factor(fares$accommodation)
fares$accommodation <- factor(fares$accommodation, levels = c("First-class parlor suite", "First-class cabin", "Second-class cabin", "Third-class cabin"))

Calculate inflation

## PEER Review MV: Would be great to know where these numbers are coming from, a little bit more annotation would be helpful.

p1921 <- (17.9/9.7)
fares$fare_1921 <- p1921*fares$fare_1912
fares$fare_1921 <- round(fares$fare_1921, 2)

p1931 <- (15.2/9.7)
fares$fare_1931 <- p1931*fares$fare_1912
fares$fare_1931 <- round(fares$fare_1931, 2)

p1941 <- (14.7/9.7)
fares$fare_1941 <- p1941*fares$fare_1912
fares$fare_1941 <- round(fares$fare_1941, 2)

p1951 <- (26.0/9.7)
fares$fare_1951 <- p1951*fares$fare_1912
fares$fare_1951 <- round(fares$fare_1951, 2)

p1961 <- (29.9/9.7)
fares$fare_1961 <- p1961*fares$fare_1912
fares$fare_1961 <- round(fares$fare_1961, 2)

p1971 <- (40.5/9.7)
fares$fare_1971 <- p1971*fares$fare_1912
fares$fare_1971 <- round(fares$fare_1971, 2)

p1981 <- (90.9/9.7)
fares$fare_1981 <- p1981*fares$fare_1912
fares$fare_1981 <- round(fares$fare_1981, 2)

p1991 <- (136.2/9.7)
fares$fare_1991 <- p1991*fares$fare_1912
fares$fare_1991 <- round(fares$fare_1991, 2)

p2001 <- (177.1/9.7)
fares$fare_2001 <- p2001*fares$fare_1912
fares$fare_2001 <- round(fares$fare_2001, 2)

p2011 <- (224.9/9.7)
fares$fare_2011 <- p2011*fares$fare_1912
fares$fare_2011 <- round(fares$fare_2011, 2)

p2021 <- (274.3/9.7)
fares$fare_2021 <- p2021*fares$fare_1912
fares$fare_2021 <- round(fares$fare_2021, 2)

Reshape data

fares_tidy <- fares %>%
   pivot_longer(cols = starts_with("fare"),
   names_to = "year",
   names_prefix = "fare_",
   values_to = "price", names_transform = list(year = as.integer))

Inflation-adjustment plot

# # PEER Review MV: Sweet graph!

fare_graph <- fares_tidy %>% 
   ggplot(aes(year, price, colour=accommodation)) +
   geom_line() +
   geom_point() +
   scale_colour_brewer(palette="Spectral") +
   facet_wrap(~ accommodation, 4, scales = "free") +
   xlim(1912,2021) +
   theme(panel.spacing = unit(1, "lines")) +
   labs(y = "Price ($USD)", x = "Year", title = "Inflation-Adjusted Titanic Ticket Prices",
        subtitle = "From 1912 to 2021", colour = "Accommodation") +
   theme_minimal()

ggplotly(fare_graph)

When taking inflation rates into consideration, we see that the average price for a first class cabin in 1912 was $150.00, which today would be $4,241.74

For this project, we used several packages. These included Rio Chan et al. (2021), Readr Wickham and Hester (2021), Here Müller (2020), Tidyverse Wickham et al. (2019), Janitor Firke (2021), Plotly Sievert (2020), Kableextra Zhu (2021), Ggridges Wilke (2021), and Party Hothorn, Hornik, and Zeileis (2006)

References

Chan, Chung-hong, Geoffrey CH Chan, Thomas J. Leeper, and Jason Becker. 2021. Rio: A Swiss-Army Knife for Data File i/o.
Firke, Sam. 2021. Janitor: Simple Tools for Examining and Cleaning Dirty Data. https://CRAN.R-project.org/package=janitor.
Hothorn, Torsten, Kurt Hornik, and Achim Zeileis. 2006. “Unbiased Recursive Partitioning: A Conditional Inference Framework.” Journal of Computational and Graphical Statistics 15 (3): 651–74.
Müller, Kirill. 2020. Here: A Simpler Way to Find Your Files. https://CRAN.R-project.org/package=here.
Sievert, Carson. 2020. Interactive Web-Based Data Visualization with r, Plotly, and Shiny. Chapman; Hall/CRC. https://plotly-r.com.
Wickham, Hadley, Mara Averick, Jennifer Bryan, Winston Chang, Lucy D’Agostino McGowan, Romain François, Garrett Grolemund, et al. 2019. “Welcome to the tidyverse.” Journal of Open Source Software 4 (43): 1686. https://doi.org/10.21105/joss.01686.
Wickham, Hadley, and Jim Hester. 2021. Readr: Read Rectangular Text Data. https://CRAN.R-project.org/package=readr.
Wilke, Claus O. 2021. Ggridges: Ridgeline Plots in ’Ggplot2’. https://CRAN.R-project.org/package=ggridges.
Zhu, Hao. 2021. kableExtra: Construct Complex Table with ’Kable’ and Pipe Syntax. https://CRAN.R-project.org/package=kableExtra.